import requests
from lxml import etree  ##解析文档  
import pandas as pd
from fake_useragent import UserAgent
from tqdm import tqdm
ua = UserAgent()

all_title = []   #爬取的标题存储列表
all_time  = []   #爬取的发表时间储存列表
def crawl_comments(fund_code,comment_max_page):
	"""
	爬取评论
    @param fund_code:
    @param comment_max_page
    @return: 
    """
	global all_title
	global all_time
	for page in tqdm(range(1,comment_max_page+1)):
	    #获取网页源代码 
		url= 'http://guba.eastmoney.com/list,of%s,f_%s.html'%(fund_code,page)
		#print(url) 
		response  = requests.get(url, headers={'User-Agent': ua.random}) 
		#print(response.text)
	    #解析网页源代码
		root = etree.HTML(response.text)  
		title = root.xpath("//div[contains(@class,'articleh normal_post')]//span[@class='l3']//a//text()")  
		time = root.xpath("//div[contains(@class,'articleh normal_post')]//span[@class='l5']//text()")  
		all_title += title  #保存到总数组上
		all_time  += time 
	data_raw = pd.DataFrame()  
	data_raw['title'] = all_title  
	data_raw['time'] = all_time  
	return data_raw
	#data_raw.to_excel(r'基金' + fund_code + '的前' + str(comment_max_page) + '页评论.xlsx', encoding='gbk', index=False) 
